Simulation Result

Comparison of Causal Discovery Agorithms

Author

Kyuri Park

Published

January 22, 2023

1 Simualtion Design

1.1 Sparse 5 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B5sparse = matrix(c(0, 0, 0, 0, 0,
                 1, 0, 0.8, 0, 0,
                 0, 0, 0, 0.9, 0,
                 0, 0.7, 0, 0, 1.5,
                 0, 0, 0, 0, 0), 5, 5, byrow = T)

colnames(B5sparse) <- c("X1", "X2", "X3", "X4", "X5")

# specify layout
layout5 = matrix(c(0,1,
                   0,0,
                   1,-1,
                   2,0,
                   2,1),5,2,byrow = T)

true5psparse <- qgraph::qgraph(t(B5sparse), layout=layout5, labels = colnames(B5sparse), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_5psparse <- matrix(c(
       0, 2, 2, 0, 0,
       3, 0, 3, 3, 3,
       3, 3, 0, 3, 0,
       0, 3, 3, 0, 3,
       0, 2, 0, 2, 0), 5, 5, byrow = TRUE)
dimnames(truepag_5psparse) <- list(paste("X", 1:5, sep=""), paste("X", 1:5, sep=""))
plotAG(truepag_5psparse)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.2 Dense 5 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B5dense = matrix(c(0, 0, 0, 0, 0,
                    1, 0, 0.8, 0, 0,
                    0, 0, 0, 0.9, 0,
                    0, 0.7, 0, 0, 1.5,
                    1, 0, 0, 0, 0), 5, 5, byrow = T)

colnames(B5dense) <- c("X1", "X2", "X3", "X4", "X5")

true5pdense <- qgraph(t(B5dense), layout=layout5, labels = colnames(B5dense), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_5pdense <- matrix(c(
  0, 2, 2, 0, 2,
  3, 0, 3, 3, 3,
  3, 3, 0, 3, 0,
  0, 3, 3, 0, 3,
  3, 2, 0, 2, 0), 5, 5, byrow = TRUE)
dimnames(truepag_5pdense) <- list(paste("X", 1:5, sep=""), paste("X", 1:5, sep=""))
plotAG(truepag_5pdense)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.3 Sparse 10 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B10sparse = matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                  0.4, 0.8, 0, 0, 0, 0, 0, 0, 0, 0, 
                  0, 0, 0.7, 0, 0, 0.9, 0, 0, 0, 0, 
                  0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
                  0, 0, 0, 0, 0.2, 0, 0.5, 0, 0, 0, 
                  0, 0, 0, 0, 0, 0, 0, 1, 0.8, 0, 
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 10, 10, byrow = T)

dimnames(B10sparse) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))

# specify layout
layout10 = matrix(c(0,1,
                      2,1,
                      1,0,
                      2,-1,
                      3,0,
                      4, -1,
                      5, 0,
                      6, -1,
                      4, 1,
                      7, 1),10,2,byrow = T)

true10psparse <- qgraph(t(B10sparse), layout = layout10, labels = colnames(B10sparse), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_10psparse <- matrix(c(
         0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 
         3, 3, 0, 2, 0, 0, 0, 0, 0, 0, 
         0, 0, 3, 0, 3, 3, 0, 0, 0, 0, 
         0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 
         0, 0, 0, 3, 3, 0, 3, 0, 0, 0, 
         0, 0, 0, 0, 0, 2, 0, 3, 3, 0, 
         0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 
         0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
         0, 0, 0, 0, 0, 0, 0, 0, 2, 0), 10, 10, byrow = T)

dimnames(truepag_10psparse) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))
plotAG(truepag_10psparse)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.4 Dense 10 nodes model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B10dense = matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                     0.4, 0.8, 0, 0, 0, 0, 0, 0, 0, 0, 
                     0, 0, 0.7, 0, 0, 0.9, 0, 0, 0, 0, 
                     0, 0.4, 0, 1, 0, 0, 0, 0, 0, 0, 
                     0, 0, 0, 0, 0.9, 0, 0.5, 0, 0, 0, 
                     0, 0, 0, 0, 0, 0, 0, 1, 0.8, 1, 
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 10, 10, byrow = T)

colnames(B10dense) <- c(paste("X", 1:10, sep=""))

# specify layout
layout10 = matrix(c(0,1,
                    2,1,
                    1,0,
                    2,-1,
                    3,0,
                    4, -1,
                    5, 0,
                    6, -1,
                    4, 1,
                    7, 1),10,2,byrow = T)

true10pdense <- qgraph(t(B10dense), layout = layout10, labels = colnames(B10dense), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_10pdense <- matrix(c(
  0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 
  3, 3, 0, 2, 0, 2, 0, 0, 0, 0, 
  0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 
  0, 3, 0, 3, 0, 3, 3, 0, 0, 0, 
  0, 0, 0, 3, 3, 0, 3, 0, 0, 0, 
  0, 0, 0, 0, 0, 2, 0, 3, 3, 3, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
  0, 0, 0, 0, 0, 0, 2, 2, 2, 0), 10, 10, byrow = T)

dimnames(truepag_10pdense) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))
plotAG(truepag_10pdense)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.5 5 nodes with a LV model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B5_lv2 = matrix(c(0, 0, 0, 0, 0, 1,
                  0, 0, 0, 0, 0, 1,
                  0, 0.5, 0, 0, 0.6, 0,
                  1, 0, 0, 0, 0.5, 0,
                  0, 0, 0, 0.5, 0,0,
                  0,0,0,0,0,0), 6, 6, byrow = T)

colnames(B5_lv2) <- c("X1", "X2", "X3", "X4", "X5", "L1")
# specify layout
layout5_lv2 = matrix(c(0,1,
                       0,0,
                       1,0.5,
                       2,1,
                       2,0,
                       -1, 0.5),6,2,byrow = T)

true5p_lv2 <- qgraph(t(B5_lv2), layout=layout5_lv2, labels = colnames(B5_lv2), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_5pLV2 <- matrix(c(
  0, 2, 2, 0, 0,
  2, 0, 3, 3, 3,
  2, 3, 0, 3, 0,
  0, 3, 3, 0, 3,
  0, 2, 0, 2, 0), 5, 5, byrow = TRUE)
dimnames(truepag_5pLV2) <- list(paste("X", 1:5, sep=""), paste("X", 1:5, sep=""))
plotAG(truepag_5pLV2)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)

1.6 10 nodes with a LV model

See code here.
par(oma=c(0, 0, 4, 0))
## Specify B matrix
B10_lv = matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0.4, 0.8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0.7, 0, 0, 0.9, 0, 0, 0, 0, 0,
                  0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0.2, 0, 0.5, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 1, 0.8, 0, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.8,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 0,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2,
                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 11, 11, byrow = T)

colnames(B10_lv) <- c(paste("X", 1:10, sep=""), "L1")

# specify layout
layout10LV = matrix(c(0,1,
                      2,1,
                      1,0,
                      2,-1,
                      3,0,
                      4, -1,
                      5, 0,
                      6, -1,
                      4, 1,
                      7, 1,
                      8, 0),11,2,byrow = T)

true10pLV <- qgraph(t(B10_lv), layout = layout10LV, labels = colnames(B10_lv), theme="colorblind", vsize = 8, asize = 5)
title("True DCG",  font.main = 1, cex.main = 2, line = 2, outer=TRUE)
## True PAG
truepag_10pLV <- matrix(c(
  0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 
  3, 3, 0, 2, 0, 2, 0, 0, 0, 0, 
  0, 0, 3, 0, 3, 3, 0, 0, 0, 0, 
  0, 0, 0, 3, 0, 3, 3, 0, 0, 0, 
  0, 0, 3, 3, 3, 0, 3, 0, 0, 0, 
  0, 0, 0, 0, 2, 2, 0, 3, 3, 0, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 
  0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
  0, 0, 0, 0, 0, 0, 0, 2, 2, 0), 10, 10, byrow = T)

dimnames(truepag_10pLV) <- list(paste("X", 1:10, sep=""), paste("X", 1:10, sep=""))
plotAG(truepag_10pLV)
title(main = "True PAG", font.main = 1, cex.main = 2, line = 2, outer=TRUE)


2 Simulation Result

See code here.
## Precision & Recall
# compute the average precision and sd
results <- list(res_ccd5psparse, res_fci5psparse, res_cci5psparse, res_ccd10psparse, res_fci10psparse, res_cci10psparse, res_ccd5pdense, res_fci5pdense, res_cci5pdense, res_ccd10pdense, res_fci10pdense, res_cci10pdense, res_ccd5pLV2, res_fci5pLV2, res_cci5pLV2, res_ccd10pLV,  res_fci10pLV, res_cci10pLV) %>% 
  map(~
  # think about how to deal with NAs or do I want to define sth. else instead of NAs.
  na.omit(.x) %>% 
  summarise(across(everything(.x), list(mean = mean, sd = sd)))
) %>% bind_rows() %>% 
  mutate(algorithm = rep(c("ccd", "fci", "cci"), 6),
         condition = rep(c("5p_sparse", "10p_sparse", "5p_dense", "10p_dense", "5p_LV", "10p_LV"), each=3)) %>% 
  # brings the algorithm and condition names first
  relocate(where(is.character), .before = where(is.numeric))

## Uncertainty
uncertainties <- data.frame(uncer_ccd5psparse, uncer_fci5psparse, uncer_cci5psparse, uncer_ccd10psparse, uncer_fci10psparse, uncer_cci10psparse, uncer_ccd5pdense, uncer_fci5pdense, uncer_cci5pdense, uncer_ccd10pdense, uncer_fci10pdense, uncer_cci10pdense, uncer_ccd5pLV2, uncer_fci5pLV2, uncer_cci5pLV2, uncer_ccd10pLV, uncer_fci10pLV, uncer_cci10pLV) %>% 
  summarise(across(everything(), list(mean = mean, sd = sd))) %>% tidyr::pivot_longer(everything()) %>%
  mutate(algorithm = substr(stringr::str_split(name, "_", simplify = T)[,2], 1, 3),
         condition = substring(stringr::str_split(name, "_", simplify = T)[,2], 4, last = 1000000L), 
         statistics = stringr::str_split(name, "_", simplify = T)[,3]
  )%>% dplyr::select(-name) %>% tidyr::pivot_wider(names_from= statistics, values_from=value)


## SHD
SHDs <- data.frame(SHD_ccd5psparse, SHD_fci5psparse, SHD_cci5psparse, SHD_ccd10psparse, SHD_fci10psparse, SHD_cci10psparse, SHD_ccd5pdense, SHD_fci5pdense, SHD_cci5pdense, SHD_ccd10pdense, SHD_fci10pdense, SHD_cci10pdense, SHD_ccd5pLV2, SHD_fci5pLV2, SHD_cci5pLV2, SHD_ccd10pLV, SHD_fci10pLV, SHD_cci10pLV) %>% 
  summarise(across(everything(), list(mean = mean, sd = sd))) %>%  tidyr::pivot_longer(cols = everything()) %>%
  mutate(algorithm = substr(stringr::str_split(name, "_", simplify = T)[,2], 1, 3),
         condition = substring(stringr::str_split(name, "_", simplify = T)[,2], 4, last = 1000000L), 
         statistics = stringr::str_split(name, "_", simplify = T)[,3]
  ) %>% dplyr::select(-name) %>% tidyr::pivot_wider(names_from= statistics, values_from=value)
See code here.
## Plot the results
library(ggplot2)
## Specify my custom theme
MyTheme <-  theme(plot.title = element_text(face = "bold", size=13, hjust = 0.5),
                  plot.subtitle = element_text(face = "italic"),
                  axis.text=element_text(face = "bold"),
                  legend.text = element_text(face = "bold", size=12))

## ========================
## WITHOUT LV CONDITION
## ========================

## WITHOUT LV CONDITION: PRECISION
p1 <- results %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition)) %>% 
ggplot(aes(x= factor(condition, levels = c("5p_sparse", "5p_dense", "10p_sparse", "10p_dense")), y=average_precision_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_precision_mean-qnorm(0.975)*average_precision_sd/sqrt(1000), ymax=average_precision_mean+qnorm(0.975)*average_precision_sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "Without a Latent Variable Condition", subtitle = "Without LV_PRECISION") +
  theme_classic() + MyTheme

## WITHOUT LV CONDITION: RECALL
p2 <- results %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p_sparse", "5p_dense", "10p_sparse", "10p_dense")), y=average_recall_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_recall_mean-qnorm(0.975)*average_recall_sd/sqrt(1000), ymax=average_recall_mean+qnorm(0.975)*average_recall_sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "Without LV_RECALL") +
  theme_classic() + MyTheme

## WITHOUT LV CONDITION: UNCERTAINTY
p3 <- uncertainties %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5psparse", "5pdense", "10psparse", "10pdense")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(1000), ymax=mean+qnorm(0.975)*sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "Without LV_UNCERTAINTY") +
  theme_classic() + MyTheme

## WITHOUT LV CONDITION: SHD
p4 <- SHDs %>% 
  # exclude LV conditions
  filter(!grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5psparse", "5pdense", "10psparse", "10pdense")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000)) 
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(1000), ymax=mean+qnorm(0.975)*sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "Without LV_SHD") +
  theme_classic() + MyTheme


## ========================
## WITH LV CONDITION
## ========================

## WITH LV CONDITION: PRECISION
p5 <- results %>% 
  # exclude LV conditions
  filter(grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p_LV", "10p_LV")), y=average_precision_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_precision_mean-qnorm(0.975)*average_precision_sd/sqrt(1000), ymax=average_precision_mean+qnorm(0.975)*average_precision_sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "With a Latent Variable Condition", subtitle = "With LV_PRECISION") +
  theme_classic() + MyTheme

## WITH LV CONDITION: RECALL
p6 <- results %>% 
  # exclude LV conditions
  filter(grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5p_LV", "10p_LV")), y=average_recall_mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=average_recall_mean-qnorm(0.975)*average_recall_sd/sqrt(1000), ymax=average_recall_mean+qnorm(0.975)*average_recall_sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "With LV_RECALL") +
  theme_classic() + MyTheme

## WITH LV CONDITION: UNCERTAINTY
p7 <- uncertainties %>% 
  # exclude LV conditions
  filter(grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5pLV2", "10pLV")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(1000), ymax=mean+qnorm(0.975)*sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "With LV_UNCERTAINTY") +
  theme_classic() + MyTheme

## WITH LV CONDITION: SHD
p8 <- SHDs %>% 
  # exclude LV conditions
 filter(grepl("LV", condition)) %>% 
  ggplot(aes(x= factor(condition, levels = c("5pLV2", "10pLV")), y=mean, group = algorithm, colour = algorithm)) +
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  geom_line(aes(group = algorithm)) +
  geom_point() +
  # in this case, N = 1000 (hence, sqrt(1000))
  geom_errorbar(aes(ymin=mean-qnorm(0.975)*sd/sqrt(1000), ymax=mean+qnorm(0.975)*sd/sqrt(1000)), width=0.1) +
  labs(x="", y="", title = "", subtitle = "With LV_SHD") +
  theme_classic() + MyTheme

# combine plots
ggarrange(p1, p5, p2, p6, p3, p7, p4, p8,
                    ncol = 2, nrow = 4, common.legend = TRUE, legend = "bottom")

Figure 1. Simuation results

See code here.
## plot the results
times %>%  
  ggplot(aes(x=factor(condition, levels= c("5psparse", "5pdense", "10psparse", "10pdense", "5pLV", "10pLV")), y = log(time), col= factor(algorithm))) +
  geom_boxplot(position = "dodge",   outlier.size = 0.8, outlier.alpha = 0.2) + theme_classic() +
  # scale_x_discrete(name ="Condition", 
  #                  labels=c("", "5p-sparse", "", "","5p-dense","","", "10p-sparse","","","10p-dense","","","5p-LV","","","10p-LV","")) + 
  scale_colour_manual(values = c("#FF0000", "#00A08A", "#F2AD00"), name= "") +
  labs(y = " log(ms)", x = "", title = "Algorithm Running Time", subtitle = "Time in milliseconds (ms)") + 
  theme(axis.text.x = element_text(face = "bold", margin = margin(t = 13), size=10),
        legend.position="bottom",
        legend.text = element_text(face = "bold"))

Figure 2. Algorithm running time


3 Empirical Example

See code here.
# empirical data 408 rows by 26 columns (p = 26)
mcnally <- read.csv("../data/McNally.csv") 
# check the data
#glimpse(mcnally)
skimr::skim(mcnally)
Data summary
Name mcnally
Number of rows 408
Number of columns 26
_______________________
Column type frequency:
numeric 26
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
onset 0 1 1.20 1.07 0 0 1 2.00 3 ▇▆▁▆▃
middle 0 1 1.44 1.07 0 0 2 2.00 3 ▆▃▁▇▃
late 0 1 0.81 1.07 0 0 0 2.00 3 ▇▂▁▂▂
hypersom 0 1 1.01 0.99 0 0 1 2.00 3 ▇▇▁▅▂
sad 0 1 1.55 0.94 0 1 2 2.00 3 ▃▇▁▇▅
decappetite 0 1 0.49 0.73 0 0 0 1.00 3 ▇▃▁▁▁
incappetite 0 1 0.44 0.87 0 0 0 0.00 3 ▇▁▁▁▁
weightloss 0 1 0.50 0.94 0 0 0 1.00 3 ▇▁▁▁▁
weightgain 0 1 0.67 1.04 0 0 0 1.00 3 ▇▂▁▂▁
concen 0 1 1.48 0.87 0 1 1 2.00 3 ▃▇▁▇▂
guilt 0 1 1.56 1.17 0 1 1 3.00 3 ▆▇▁▃▇
suicide 0 1 0.63 0.82 0 0 0 1.00 3 ▇▅▁▂▁
anhedonia 0 1 1.27 1.05 0 0 1 2.00 3 ▇▇▁▅▅
fatigue 0 1 1.33 0.95 0 1 1 2.00 3 ▅▇▁▆▃
retard 0 1 0.66 0.81 0 0 0 1.00 3 ▇▅▁▂▁
agitation 0 1 1.10 0.93 0 0 1 2.00 3 ▅▇▁▃▂
obtime 0 1 2.95 0.89 0 2 3 4.00 4 ▁▁▆▇▇
obinterfer 0 1 2.69 0.83 0 2 3 3.00 4 ▁▁▆▇▃
obdistress 0 1 2.81 0.76 0 2 3 3.00 4 ▁▁▅▇▃
obresist 0 1 1.98 0.93 0 1 2 2.25 4 ▁▃▇▃▁
obcontrol 0 1 2.67 0.76 0 2 3 3.00 4 ▁▁▆▇▂
comptime 0 1 2.60 0.93 0 2 3 3.00 4 ▁▂▇▇▅
compinterf 0 1 2.58 0.88 0 2 3 3.00 4 ▁▂▆▇▂
compdis 0 1 2.71 0.84 0 2 3 3.00 4 ▁▁▅▇▂
compresis 0 1 2.16 0.89 0 2 2 3.00 4 ▁▂▇▅▁
compcont 0 1 2.60 0.76 0 2 3 3.00 4 ▁▁▆▇▂
See code here.
# separate dep / ocd symptoms
depression <- mcnally[,1:16]
ocd <- mcnally[,17:26]
See code here.
## run ccd algorithm on entire mcnally (discrete)
ccd_mcnally <- ccdKP(df=mcnally, dataType = "discrete") 
matccd_mcnally <- CreateAdjMat(ccd_mcnally, p = 26)

## run fci algorithm on entire mcnally (discrete)
fci_mcnally <- tetradrunner(algoId = 'fci', df = mcnally,
                          dataType = 'discrete')
matfci_mcnally <- CreateAdjMat(fci_mcnally, p = ncol(mcnally))

## run ccd on depression symptoms
ccd_mcnally_dep <- ccdKP(df=depression, dataType = "discrete") 
mat_mcnally_dep <- CreateAdjMat(ccd_mcnally_dep, p = ncol(depression))

## run fci on depression symptoms
fci_mcdep <- tetradrunner(algoId = 'fci', df = depression,
                          dataType = 'discrete')
matfci_mcdep <- CreateAdjMat(fci_mcdep, p = ncol(depression))

## run ccd on ocd symptoms
ccd_mcnally_ocd <- ccdKP(df=ocd, dataType = "discrete") 
mat_mcnally_ocd <- CreateAdjMat(ccd_mcnally_ocd, p = ncol(ocd))

## run fci on ocd symptoms
fci_mcocd <- tetradrunner(algoId = 'fci', df = ocd,
                          dataType = 'discrete')
matfci_mcocd <- CreateAdjMat(fci_mcocd, p = ncol(ocd))
See code here.
## CCD PAG for the entire Mcnally
plotPAG(ccd_mcnally, matccd_mcnally) 

Figure 3. CCD PAG on entire McNally data

See code here.
## FCI PAG for the entire Mcnally
plotPAG(fci_mcnally, matfci_mcnally) 

Figure 4. FCI PAG on entire McNally data

See code here.
## CCD PAG for the depression symptoms
plotPAG(ccd_mcnally_dep, mat_mcnally_dep) 

## FCI PAG for the depression symptoms
plotPAG(fci_mcdep, matfci_mcdep)

## CCD PAG for the ocd symptoms
plotPAG(ccd_mcnally_ocd, mat_mcnally_ocd) 

## FCI PAG for the ocd symptoms
plotPAG(fci_mcocd, matfci_mcocd)

(a) CCD PAG_depression symptoms

(b) FCI PAG_depression symptoms

(c) CCD PAG_OCD symptoms

(d) FCI PAG_OCD symptoms

Figure 5. Separate PAGs on depression and OCD symptoms

4 Session info

sessionInfo()
R version 4.2.2 (2022-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.0

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] Rgraphviz_2.40.0    graph_1.74.0        BiocGenerics_0.42.0
 [4] DOT_0.1             rcausal_1.2.1       devtools_2.4.5     
 [7] usethis_2.1.6       rJava_1.0-6         dplyr_1.0.9        
[10] purrr_0.3.4         ggpubr_0.4.0        ggplot2_3.3.6      
[13] pcalg_2.7-8         qgraph_1.9.2       

loaded via a namespace (and not attached):
  [1] colorspace_2.0-3    ggsignif_0.6.3      deldir_1.0-6       
  [4] ellipsis_0.3.2      htmlTable_2.4.1     corpcor_1.6.10     
  [7] base64enc_0.1-3     fs_1.5.2            clue_0.3-63        
 [10] rstudioapi_0.13     farver_2.1.1        lavaan_0.6-12      
 [13] remotes_2.4.2       fansi_1.0.3         splines_4.2.2      
 [16] mnormt_2.1.0        cachem_1.0.6        robustbase_0.95-0  
 [19] knitr_1.40          glasso_1.11         pkgload_1.3.0      
 [22] Formula_1.2-4       jsonlite_1.8.0      broom_1.0.0        
 [25] cluster_2.1.4       png_0.1-7           sfsmisc_1.1-14     
 [28] shiny_1.7.2         compiler_4.2.2      backports_1.4.1    
 [31] assertthat_0.2.1    Matrix_1.5-1        fastmap_1.1.0      
 [34] cli_3.4.1           later_1.3.0         prettyunits_1.1.1  
 [37] htmltools_0.5.2     tools_4.2.2         igraph_1.3.5       
 [40] gtable_0.3.0        glue_1.6.2          reshape2_1.4.4     
 [43] V8_4.2.1            Rcpp_1.0.9          carData_3.0-5      
 [46] vctrs_0.4.1         nlme_3.1-160        psych_2.2.5        
 [49] xfun_0.31           stringr_1.4.1       ps_1.7.1           
 [52] ggm_2.5             mime_0.12           miniUI_0.1.1.1     
 [55] lifecycle_1.0.1     gtools_3.9.3        rstatix_0.7.0      
 [58] DEoptimR_1.0-11     scales_1.2.0        promises_1.2.0.1   
 [61] parallel_4.2.2      RBGL_1.72.0         RColorBrewer_1.1-3 
 [64] curl_4.3.2          yaml_2.3.5          memoise_2.0.1      
 [67] pbapply_1.5-0       gridExtra_2.3       bdsmatrix_1.3-6    
 [70] rpart_4.1.19        fastICA_1.2-3       latticeExtra_0.6-30
 [73] stringi_1.7.8       highr_0.9           checkmate_2.1.0    
 [76] pkgbuild_1.3.1      repr_1.1.4          rlang_1.0.6        
 [79] pkgconfig_2.0.3     evaluate_0.15       lattice_0.20-45    
 [82] labeling_0.4.2      htmlwidgets_1.5.4   cowplot_1.1.1      
 [85] tidyselect_1.1.2    processx_3.7.0      plyr_1.8.7         
 [88] magrittr_2.0.3      R6_2.5.1            profvis_0.3.7      
 [91] generics_0.1.3      Hmisc_4.7-0         DBI_1.1.3          
 [94] pillar_1.7.0        foreign_0.8-83      withr_2.5.0        
 [97] survival_3.4-0      abind_1.4-5         nnet_7.3-18        
[100] tibble_3.1.7        crayon_1.5.1        car_3.1-0          
[103] interp_1.1-2        fdrtool_1.2.17      utf8_1.2.2         
[106] urlchecker_1.0.1    rmarkdown_2.16      jpeg_0.1-9         
[109] data.table_1.14.2   pbivnorm_0.6.0      callr_3.7.1        
[112] digest_0.6.29       xtable_1.8-4        tidyr_1.2.0        
[115] httpuv_1.6.5        stats4_4.2.2        munsell_0.5.0      
[118] skimr_2.1.4         sessioninfo_1.2.2